Data Cleaning - Geospatial Data

import pandas as pd
import os
import geopandas as gpd
import matplotlib.pyplot as plt
from geopy.geocoders import Nominatim
import folium
import ipywidgets as widgets
from IPython.display import display
# specify the directory containing the CSV files
directory = '../../data/quarterly_financials'

# create an empty list to store the dataframes
dfs = []

# loop over the CSV files in the directory
for filename in os.listdir(directory):
    if filename.endswith('.csv'):
        # read the CSV file into a dataframe and append it to the list
        path = os.path.join(directory, filename)
        df = pd.read_csv(path)
        dfs.append(df)

# concatenate the dataframes into a single dataframe
quarterly_financials = pd.concat(dfs, ignore_index=True)
# sort
quarterly_financials = quarterly_financials.sort_values(by=['REPDTE', 'CERT'], ascending=[False, True])
quarterly_financials = quarterly_financials.reset_index(drop=True)
# print the combined dataframe
display(quarterly_financials.shape)
display(quarterly_financials.head())
(971541, 15)
ZIP BKCLASS REPDTE DEPDOM ASSET STNAME EQ NAME CITY ADDRESS ENDEFYMD CERT ESTYMD LIAB ID
0 2111 SM 20221231 163284000.0 298020000 MASSACHUSETTS 26579000.0 STATE STREET BANK&TRUST CO BOSTON 1 LINCOLN ST 99991231.0 14 17920101 271441000.0 14_20221231
1 36830 SM 20221231 952037.0 1023366 ALABAMA 65967.0 AUBURNBANK AUBURN 100 N GAY ST 99991231.0 35 19070103 957399.0 35_20221231
2 36732 NM 20221231 407949.0 444822 ALABAMA 32577.0 ROBERTSON BANKING CO DEMOPOLIS 216 N WALNUT AVE 99991231.0 39 18700101 412245.0 39_20221231
3 36867 NM 20221231 266874.0 265272 ALABAMA -8465.0 PHENIX-GIRARD BANK PHENIX CITY 801 13TH ST 99991231.0 41 19040504 273737.0 41_20221231
4 36401 NM 20221231 70649.0 76239 ALABAMA 5487.0 BANK OF EVERGREEN EVERGREEN 146 W FRONT ST 99991231.0 49 19320901 70752.0 49_20221231
display(len(quarterly_financials['BKCLASS'].unique()))
display(quarterly_financials['BKCLASS'].unique())
7
array(['SM', 'NM', 'N', 'SI', 'SB', 'SL', 'OI'], dtype=object)

Zip coordinates

# bring in
zip_coordinates = pd.read_csv('../../data/coordinate_data/2022_Gaz_zcta_national.txt', sep='\t')

# remove whitespace in col names
zip_coordinates.columns = [col.strip() for col in zip_coordinates.columns]

# drop unnecesary columns
zip_coordinates.drop(['ALAND', 'ALAND_SQMI', 'AWATER', 'AWATER_SQMI'], axis=1, inplace=True)

# check
display(zip_coordinates.shape)
display(zip_coordinates.head())
(33791, 3)
GEOID INTPTLAT INTPTLONG
0 601 18.180555 -66.749961
1 602 18.361945 -67.175597
2 603 18.457399 -67.124867
3 606 18.158327 -66.932928
4 610 18.293960 -67.127182
display(len(quarterly_financials['ZIP'].unique()))
10754

Combine

# Rename the 'GEOID' column in 'zip_coordinates' to match the 'ZIP' column in 'quarterly_financials'
zip_coordinates = zip_coordinates.rename(columns={'GEOID': 'ZIP'})

# Merge the two dataframes on the 'ZIP' column
merged_df = quarterly_financials.merge(zip_coordinates, on='ZIP', how='left')

# Create a new column 'coordinates' with the combined 'INTPTLAT' and 'INTPTLONG' columns as a tuple
merged_df['zip_coordinates'] = list(zip(merged_df['INTPTLAT'], merged_df['INTPTLONG']))

# Update the 'quarterly_financials' dataframe with the new 'coordinates' column
quarterly_financials = merged_df

# Print the updated 'quarterly_financials' dataframe
display(quarterly_financials.shape)
display(quarterly_financials.head())
(971541, 18)
ZIP BKCLASS REPDTE DEPDOM ASSET STNAME EQ NAME CITY ADDRESS ENDEFYMD CERT ESTYMD LIAB ID INTPTLAT INTPTLONG zip_coordinates
0 2111 SM 20221231 163284000.0 298020000 MASSACHUSETTS 26579000.0 STATE STREET BANK&TRUST CO BOSTON 1 LINCOLN ST 99991231.0 14 17920101 271441000.0 14_20221231 42.350680 -71.060527 (42.35068, -71.060527)
1 36830 SM 20221231 952037.0 1023366 ALABAMA 65967.0 AUBURNBANK AUBURN 100 N GAY ST 99991231.0 35 19070103 957399.0 35_20221231 32.534872 -85.493755 (32.534872, -85.493755)
2 36732 NM 20221231 407949.0 444822 ALABAMA 32577.0 ROBERTSON BANKING CO DEMOPOLIS 216 N WALNUT AVE 99991231.0 39 18700101 412245.0 39_20221231 32.417456 -87.892213 (32.417456, -87.892213)
3 36867 NM 20221231 266874.0 265272 ALABAMA -8465.0 PHENIX-GIRARD BANK PHENIX CITY 801 13TH ST 99991231.0 41 19040504 273737.0 41_20221231 32.498054 -85.023590 (32.498054, -85.02359)
4 36401 NM 20221231 70649.0 76239 ALABAMA 5487.0 BANK OF EVERGREEN EVERGREEN 146 W FRONT ST 99991231.0 49 19320901 70752.0 49_20221231 31.468970 -86.950426 (31.46897, -86.950426)
# removed unmatched zips
quarterly_financials.dropna(subset=['INTPTLAT'], inplace=True)

# drop unnecesary columns
quarterly_financials.drop(['INTPTLAT', 'INTPTLONG', 'ENDEFYMD', 'CERT', 'ESTYMD', 'LIAB', 'ID', 'ADDRESS', 'EQ', 'ZIP'], axis=1, inplace=True)

# convert numerical values to millions (source is in thousands)
quarterly_financials['DEPDOM'] = quarterly_financials['DEPDOM'].divide(1000)
quarterly_financials['ASSET'] = quarterly_financials['ASSET'].divide(1000)

# rename
quarterly_financials.rename(columns={
    'BKCLASS': 'bank_class', 
    'REPDTE': 'report_date', 
    'DEPDOM': 'deposits_mill', 
    'ASSET': 'assets_mill', 
    'STNAME': 'state', 
    'NAME': 'name',
    'CITY': 'city'
    }, inplace=True)

display(quarterly_financials.head())
bank_class report_date deposits_mill assets_mill state name city zip_coordinates
0 SM 20221231 163284.000 298020.000 MASSACHUSETTS STATE STREET BANK&TRUST CO BOSTON (42.35068, -71.060527)
1 SM 20221231 952.037 1023.366 ALABAMA AUBURNBANK AUBURN (32.534872, -85.493755)
2 NM 20221231 407.949 444.822 ALABAMA ROBERTSON BANKING CO DEMOPOLIS (32.417456, -87.892213)
3 NM 20221231 266.874 265.272 ALABAMA PHENIX-GIRARD BANK PHENIX CITY (32.498054, -85.02359)
4 NM 20221231 70.649 76.239 ALABAMA BANK OF EVERGREEN EVERGREEN (31.46897, -86.950426)
# change to title case
quarterly_financials[['state', 'name', 'city']] = quarterly_financials[['state', 'name', 'city']].apply(lambda x: x.str.title())

# format date
quarterly_financials['report_date'] = pd.to_datetime(quarterly_financials['report_date'], format='%Y%m%d')

# set to float
quarterly_financials['deposits_mill'] = quarterly_financials['deposits_mill'].astype(float)
quarterly_financials['assets_mill'] = quarterly_financials['assets_mill'].astype(float)


display(quarterly_financials.head())
bank_class report_date deposits_mill assets_mill state name city zip_coordinates
0 SM 2022-12-31 163284.000 298020.000 Massachusetts State Street Bank&Trust Co Boston (42.35068, -71.060527)
1 SM 2022-12-31 952.037 1023.366 Alabama Auburnbank Auburn (32.534872, -85.493755)
2 NM 2022-12-31 407.949 444.822 Alabama Robertson Banking Co Demopolis (32.417456, -87.892213)
3 NM 2022-12-31 266.874 265.272 Alabama Phenix-Girard Bank Phenix City (32.498054, -85.02359)
4 NM 2022-12-31 70.649 76.239 Alabama Bank Of Evergreen Evergreen (31.46897, -86.950426)
quarterly_financials['bank_class'] = quarterly_financials['bank_class'].replace({
    'N':  'Commercial bank, national charter, Fed member',
    'NM': 'Commercial bank, state charter, Fed non-member',
    'OI': 'Insured U.S. branch of a foreign chartered institution',
    'SB': 'Federal savings banks',
    'SI': 'State chartered stock savings banks',
    'SL': 'State chartered stock savings and loan association',
    'SM': 'Commercial bank, state charter, Fed member',
    'NC': 'Noninsured non-deposit commercial bank',
    'NS': 'Noninsured stock savings bank',
    'CU': 'State or federally chartered credit union',
    })

display(quarterly_financials.shape)
display(quarterly_financials.head())
(937293, 8)
bank_class report_date deposits_mill assets_mill state name city zip_coordinates
0 Commercial bank, state charter, Fed member 2022-12-31 163284.000 298020.000 Massachusetts State Street Bank&Trust Co Boston (42.35068, -71.060527)
1 Commercial bank, state charter, Fed member 2022-12-31 952.037 1023.366 Alabama Auburnbank Auburn (32.534872, -85.493755)
2 Commercial bank, state charter, Fed non-member 2022-12-31 407.949 444.822 Alabama Robertson Banking Co Demopolis (32.417456, -87.892213)
3 Commercial bank, state charter, Fed non-member 2022-12-31 266.874 265.272 Alabama Phenix-Girard Bank Phenix City (32.498054, -85.02359)
4 Commercial bank, state charter, Fed non-member 2022-12-31 70.649 76.239 Alabama Bank Of Evergreen Evergreen (31.46897, -86.950426)
quarterly_financials['zip_coordinates'].isna().sum()
0

Growth of Assets with Slider

from ipywidgets.embed import embed_minimal_html
import branca
from IPython.display import HTML


gdf = gpd.GeoDataFrame(quarterly_financials, geometry=gpd.points_from_xy(quarterly_financials.zip_coordinates.apply(lambda p: p[1]), quarterly_financials.zip_coordinates.apply(lambda p: p[0])))


def plot_bank_assets_by_date(date):
    # Filter the GeoDataFrame by the selected date
    filtered_gdf = gdf[gdf['report_date'] == date]

    # Group the filtered GeoDataFrame by state and sum the assets
    state_assets = filtered_gdf.groupby('state')['assets_mill'].sum().reset_index()

    # Create the base folium map
    m = folium.Map(location=[37.8, -96], zoom_start=4)

    # Define a function to scale the assets to a suitable size for the map
    def scale_bubble_size(assets):
        return assets / 50000

    # Plot bubbles for each state with a size proportional to the total assets
    for index, row in state_assets.iterrows():
        state_data = filtered_gdf[filtered_gdf['state'] == row['state']]
        state_centroid = state_data.unary_union.centroid
        folium.CircleMarker(
            location=[state_centroid.y, state_centroid.x],
            radius=scale_bubble_size(row['assets_mill']),
            color='blue',
            fill=True,
            fill_color='blue',
            fill_opacity=0.5,
            popup=f"State: {row['state']}<br>Total Assets: {row['assets_mill']:.0f}<br>Date: {date}"
        ).add_to(m)

    # Display the map
    return m



# Get the unique report dates sorted
unique_dates = sorted(quarterly_financials['report_date'].unique())

# Create the index slider
index_slider = widgets.IntSlider(
    min=0,
    max=len(unique_dates) - 1,
    description='Date Index:',
    continuous_update=False,
)

# Create the play button
play_button = widgets.Play(
    interval=100,  # Time in milliseconds between updates
    value=0,  # Slider starting value
    min=0,
    max=len(unique_dates) - 1,
    step=1,
    description="Press play",
    disabled=False
)

# Link the play button to the index slider
widgets.jslink((play_button, 'value'), (index_slider, 'value'))

# Create a horizontal box to display the slider and play button together
slider_with_play = widgets.HBox([index_slider, play_button])

def display_map(index):
    date = unique_dates[index]
    m = plot_bank_assets_by_date(date)
    display(m)

# Use widgets.interactive() and assign the output to a variable
interactive_map = widgets.interactive(display_map, index=index_slider)

# Clear the output of interactive_map (to prevent double display of the map)
interactive_map.update()


# Display the slider with the play button and the interactive map
display(slider_with_play)
display(interactive_map.children[-1])


import folium
import geopandas as gpd
import pandas as pd
from IPython.display import display
from ipywidgets import GridBox, Layout, HTML
import ipywidgets as widgets

gdf = gpd.GeoDataFrame(quarterly_financials, geometry=gpd.points_from_xy(quarterly_financials.zip_coordinates.apply(lambda p: p[1]), quarterly_financials.zip_coordinates.apply(lambda p: p[0])))

def plot_bank_assets_by_date(date):
    filtered_gdf = gdf[gdf['report_date'] == date]

    state_assets = filtered_gdf.groupby('state')['assets_mill'].sum().reset_index()

    m = folium.Map(location=[37.8, -96], zoom_start=4)

    def scale_bubble_size(assets):
        return assets / 50000

    for index, row in state_assets.iterrows():
        state_data = filtered_gdf[filtered_gdf['state'] == row['state']]
        state_centroid = state_data.unary_union.centroid
        folium.CircleMarker(
            location=[state_centroid.y, state_centroid.x],
            radius=scale_bubble_size(row['assets_mill']),
            color='blue',
            fill=True,
            fill_color='blue',
            fill_opacity=0.5,
            popup=f"State: {row['state']}<br>Total Assets: {row['assets_mill']:.0f}<br>Date: {date}"
        ).add_to(m)

    return m

# The dates to create the maps for
dates = ['1992-12-31', '2002-12-31', '2012-12-31', '2022-12-31']

# Create the maps for each date
maps = [plot_bank_assets_by_date(date) for date in dates]

# Wrap the map HTML strings with ipywidgets.HTML
map_widgets = [HTML(value=map._repr_html_()) for map in maps]

# Display the maps in a 2x2 grid
grid_maps = GridBox(map_widgets, layout=Layout(grid_template_columns="repeat(2, 1fr)"))
display(grid_maps)
import folium
import geopandas as gpd
import pandas as pd
import ipywidgets as widgets
from IPython.display import display


gdf = gpd.GeoDataFrame(quarterly_financials, geometry=gpd.points_from_xy(
    quarterly_financials.zip_coordinates.apply(lambda p: p[1]), 
    quarterly_financials.zip_coordinates.apply(lambda p: p[0])
))


def plot_bank_assets_by_date(date):
    # Filter the GeoDataFrame by the selected date
    filtered_gdf = gdf[gdf['report_date'] == date]

    # Group the filtered GeoDataFrame by state and sum the assets
    state_assets = filtered_gdf.groupby('state')['assets_mill'].sum().reset_index()

    # Create the base folium map
    m = folium.Map(location=[37.8, -96], zoom_start=4)

    # Define a function to scale the assets to a suitable size for the map
    def scale_bubble_size(assets):
        return assets / 50000

    # Plot bubbles for each state with a size proportional to the total assets
    for index, row in state_assets.iterrows():
        state_data = filtered_gdf[filtered_gdf['state'] == row['state']]
        state_centroid = state_data.unary_union.centroid
        folium.CircleMarker(
            location=[state_centroid.y, state_centroid.x],
            radius=scale_bubble_size(row['assets_mill']),
            color='blue',
            fill=True,
            fill_color='blue',
            fill_opacity=0.5,
            popup=f"State: {row['state']}<br>Total Assets: {row['assets_mill']:.0f}<br>Date: {date}"
        ).add_to(m)

    # Return the map
    return m


# Define the four dates to display
dates = ['1992-12-31', '2002-12-31', '2012-12-31', '2022-12-31']

# Create a list of output widgets containing the Folium maps for each date
outputs = [widgets.Output() for _ in range(4)]

# Create a 2x2 grid of output widgets and add the Folium maps to them
grid = widgets.GridBox(outputs, layout=widgets.Layout(grid_template_columns="repeat(2, 1fr)", grid_gap="20px"))
for i, output in enumerate(outputs):
    with output:
        display(plot_bank_assets_by_date(dates[i]))

# Display the grid
display(grid)
import folium
import geopandas as gpd
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, HTML


gdf = gpd.GeoDataFrame(quarterly_financials, geometry=gpd.points_from_xy(
    quarterly_financials.zip_coordinates.apply(lambda p: p[1]), 
    quarterly_financials.zip_coordinates.apply(lambda p: p[0])
))


def plot_bank_assets_by_date(date):
    # Filter the GeoDataFrame by the selected date
    filtered_gdf = gdf[gdf['report_date'] == date]

    # Group the filtered GeoDataFrame by state and sum the assets
    state_assets = filtered_gdf.groupby('state')['assets_mill'].sum().reset_index()

    # Create the base folium map
    m = folium.Map(location=[37.8, -96], zoom_start=4)

    # Define a function to scale the assets to a suitable size for the map
    def scale_bubble_size(assets):
        return assets / 50000

    # Plot bubbles for each state with a size proportional to the total assets
    for index, row in state_assets.iterrows():
        state_data = filtered_gdf[filtered_gdf['state'] == row['state']]
        state_centroid = state_data.unary_union.centroid
        folium.CircleMarker(
            location=[state_centroid.y, state_centroid.x],
            radius=scale_bubble_size(row['assets_mill']),
            color='blue',
            fill=True,
            fill_color='blue',
            fill_opacity=0.5,
            popup=f"State: {row['state']}<br>Total Assets: {row['assets_mill']:.0f}<br>Date: {date}"
        ).add_to(m)

    # Return the map as an HTML string
    return m._repr_html_()


# Define the four dates to display
dates = ['1992-12-31', '2002-12-31', '2012-12-30', '2022-12-31']

# Create a list of output widgets containing the Folium maps for each date
outputs = [widgets.Output() for _ in range(4)]

# Add the Folium maps to the output widgets
for i, output in enumerate(outputs):
    with output:
        display(HTML(plot_bank_assets_by_date(dates[i])))

# Create a VBox to group the output widgets vertically
vbox1 = widgets.VBox([outputs[0], outputs[1]])
vbox2 = widgets.VBox([outputs[2], outputs[3]])

# Create an HBox to group the two VBoxes horizontally
hbox = widgets.HBox([vbox1, vbox2])

# Display the HBox
display(hbox)
import folium
import geopandas as gpd
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, HTML


gdf = gpd.GeoDataFrame(quarterly_financials, geometry=gpd.points_from_xy(
    quarterly_financials.zip_coordinates.apply(lambda p: p[1]), 
    quarterly_financials.zip_coordinates.apply(lambda p: p[0])
))


def plot_bank_assets_by_date(date):
    # Filter the GeoDataFrame by the selected date
    filtered_gdf = gdf[gdf['report_date'] == date]

    # Group the filtered GeoDataFrame by state and sum the assets
    state_assets = filtered_gdf.groupby('state')['assets_mill'].sum().reset_index()

    # Create the base folium map
    m = folium.Map(location=[37.8, -96], zoom_start=4)

    # Define a function to scale the assets to a suitable size for the map
    def scale_bubble_size(assets):
        return assets / 50000

    # Plot bubbles for each state with a size proportional to the total assets
    for index, row in state_assets.iterrows():
        state_data = filtered_gdf[filtered_gdf['state'] == row['state']]
        state_centroid = state_data.unary_union.centroid
        folium.CircleMarker(
            location=[state_centroid.y, state_centroid.x],
            radius=scale_bubble_size(row['assets_mill']),
            color='blue',
            fill=True,
            fill_color='blue',
            fill_opacity=0.5,
            popup=f"State: {row['state']}<br>Total Assets: {row['assets_mill']:.0f}<br>Date: {date}"
        ).add_to(m)

    # Return the map as an HTML string
    return m._repr_html_()


# Define the four dates to display
dates = ['1992-12-31', '2002-12-31', '2012-12-31', '2022-12-31']

# Create a list of output widgets containing the Folium maps for each date
outputs = [widgets.Output() for _ in range(4)]

# Add the Folium maps to the output widgets
for i, output in enumerate(outputs):
    with output:
        display(HTML(plot_bank_assets_by_date(dates[i])))

# Create an HBox to display the output widgets in a row
hbox = widgets.HBox(outputs)

# Display the HBox
display(hbox)
from ipywidgets.embed import embed_minimal_html
import branca
from IPython.display import HTML, clear_output, display
import base64
from io import BytesIO
from IPython.display import IFrame

gdf = gpd.GeoDataFrame(quarterly_financials, geometry=gpd.points_from_xy(quarterly_financials.zip_coordinates.apply(lambda p: p[1]), quarterly_financials.zip_coordinates.apply(lambda p: p[0])))

def plot_bank_assets_by_date(date):
    # Filter the GeoDataFrame by the selected date
    filtered_gdf = gdf[gdf['report_date'] == date]

    # Group the filtered GeoDataFrame by state and sum the assets
    state_assets = filtered_gdf.groupby('state')['assets_mill'].sum().reset_index()

    # Create the base folium map
    m = folium.Map(location=[37.8, -96], zoom_start=4)

    # Define a function to scale the assets to a suitable size for the map
    def scale_bubble_size(assets):
        return assets / 50000

    # Plot bubbles for each state with a size proportional to the total assets
    for index, row in state_assets.iterrows():
        state_data = filtered_gdf[filtered_gdf['state'] == row['state']]
        state_centroid = state_data.unary_union.centroid
        folium.CircleMarker(
            location=[state_centroid.y, state_centroid.x],
            radius=scale_bubble_size(row['assets_mill']),
            color='blue',
            fill=True,
            fill_color='blue',
            fill_opacity=0.5,
            popup=f"State: {row['state']}<br>Total Assets: {row['assets_mill']:.0f}<br>Date: {date}"
        ).add_to(m)

    # Display the map
    return m

unique_dates = sorted(quarterly_financials['report_date'].unique())

latest_date = unique_dates[-1]

map_display_id = display(plot_bank_assets_by_date(latest_date), display_id=True)


def folium_map_to_iframe(m):
    data = BytesIO()
    m.save(data, close_file=False)
    data.seek(0)
    b64 = base64.b64encode(data.read()).decode("utf-8")
    return IFrame(f"data:text/html;base64,{b64}", width="100%", height="400")


def on_slider_change(change):
    pass


# Display the latest map
iframe = folium_map_to_iframe(plot_bank_assets_by_date(latest_date))
map_display_id.update(iframe)
from ipywidgets.embed import embed_minimal_html
import branca
from IPython.display import HTML, clear_output, display
import base64
from io import BytesIO
from IPython.display import IFrame
import pandas as pd
import geopandas as gpd
import folium


# Convert quarterly_financials to a GeoDataFrame
gdf = gpd.GeoDataFrame(quarterly_financials, geometry=gpd.points_from_xy(quarterly_financials.zip_coordinates.apply(lambda p: p[1]), quarterly_financials.zip_coordinates.apply(lambda p: p[0])))

def plot_bank_assets_by_date(date):
    # Filter the GeoDataFrame by the selected date
    filtered_gdf = gdf[gdf['report_date'] == date]

    # Group the filtered GeoDataFrame by state and sum the assets
    state_assets = filtered_gdf.groupby('state')['assets_mill'].sum().reset_index()

    # Create the base folium map
    m = folium.Map(location=[37.8, -96], zoom_start=4)

    # Define a function to scale the assets to a suitable size for the map
    def scale_bubble_size(assets):
        return assets / 50000

    # Plot bubbles for each state with a size proportional to the total assets
    for index, row in state_assets.iterrows():
        state_data = filtered_gdf[filtered_gdf['state'] == row['state']]
        state_centroid = state_data.unary_union.centroid
        folium.CircleMarker(
            location=[state_centroid.y, state_centroid.x],
            radius=scale_bubble_size(row['assets_mill']),
            color='blue',
            fill=True,
            fill_color='blue',
            fill_opacity=0.5,
            popup=f"State: {row['state']}<br>Total Assets: {row['assets_mill']:.0f}<br>Date: {date}"
        ).add_to(m)

    # Display the map
    return m


unique_dates = ['1992-12-31', '2002-12-31', '2012-12-31', '2022-12-31']

maps = []

for date in unique_dates:
    maps.append(plot_bank_assets_by_date(pd.to_datetime(date)))

for i, m in enumerate(maps):
    iframe = folium_map_to_iframe(m)
    display(HTML(f"<h2>{unique_dates[i]}</h2>"))
    display(iframe)
    display(HTML("<hr>"))
    

def folium_map_to_iframe(m):
    data = BytesIO()
    m.save(data, close_file=False)
    data.seek(0)
    b64 = base64.b64encode(data.read()).decode("utf-8")
    return IFrame(f"data:text/html;base64,{b64}", width="100%", height="400")

1992-12-31


2002-12-31


2012-12-31


2022-12-31


from ipywidgets.embed import embed_minimal_html
import branca
from IPython.display import HTML, clear_output, display
import base64
from io import BytesIO
from IPython.display import IFrame
import pandas as pd
import geopandas as gpd
import folium

# Convert quarterly_financials to a GeoDataFrame
gdf = gpd.GeoDataFrame(quarterly_financials, geometry=gpd.points_from_xy(quarterly_financials.zip_coordinates.apply(lambda p: p[1]), quarterly_financials.zip_coordinates.apply(lambda p: p[0])))

def plot_bank_assets_by_date(date):
    # Filter the GeoDataFrame by the selected date
    filtered_gdf = gdf[gdf['report_date'] == date]

    # Group the filtered GeoDataFrame by state and sum the assets
    state_assets = filtered_gdf.groupby('state')['assets_mill'].sum().reset_index()

    # Create the base folium map
    m = folium.Map(location=[37.8, -96], zoom_start=4)

    # Define a function to scale the assets to a suitable size for the map
    def scale_bubble_size(assets):
        return assets / 50000

    # Plot bubbles for each state with a size proportional to the total assets
    for index, row in state_assets.iterrows():
        state_data = filtered_gdf[filtered_gdf['state'] == row['state']]
        state_centroid = state_data.unary_union.centroid
        folium.CircleMarker(
            location=[state_centroid.y, state_centroid.x],
            radius=scale_bubble_size(row['assets_mill']),
            color='blue',
            fill=True,
            fill_color='blue',
            fill_opacity=0.5,
            popup=f"State: {row['state']}<br>Total Assets: {row['assets_mill']:.0f}<br>Date: {date}"
        ).add_to(m)

    # Return the map
    return m

def folium_map_to_html(m):
    data = BytesIO()
    m.save(data, close_file=False)
    data.seek(0)
    b64 = base64.b64encode(data.read()).decode("utf-8")
    return f'<iframe src="data:text/html;base64,{b64}" width="100%" height="400"></iframe>'

unique_dates = ['1992-12-31', '2002-12-31', '2012-12-31', '2022-12-31']

maps = []

for date in unique_dates:
    maps.append(plot_bank_assets_by_date(pd.to_datetime(date)))

table_html = '<table><tr>'
for i, m in enumerate(maps):
    table_html += f'<td>{folium_map_to_html(m)}</td>'
    if (i + 1) % 2 == 0:
        table_html += '</tr><tr>'
table_html += '</tr></table>'

display(HTML(table_html))
from IPython.display import display, HTML
from io import BytesIO
import base64
import pandas as pd
import geopandas as gpd
import folium


# Convert quarterly_financials to a GeoDataFrame
gdf = gpd.GeoDataFrame(quarterly_financials, geometry=gpd.points_from_xy(quarterly_financials.zip_coordinates.apply(lambda p: p[1]), quarterly_financials.zip_coordinates.apply(lambda p: p[0])))

def plot_bank_assets_by_date(date):
    # Filter the GeoDataFrame by the selected date
    filtered_gdf = gdf[gdf['report_date'] == date]

    # Group the filtered GeoDataFrame by state and sum the assets
    state_assets = filtered_gdf.groupby('state')['assets_mill'].sum().reset_index()

    # Create the base folium map
    m = folium.Map(location=[37.8, -96], zoom_start=4)

    # Define a function to scale the assets to a suitable size for the map
    def scale_bubble_size(assets):
        return assets / 50000

    # Plot bubbles for each state with a size proportional to the total assets
    for index, row in state_assets.iterrows():
        state_data = filtered_gdf[filtered_gdf['state'] == row['state']]
        state_centroid = state_data.unary_union.centroid
        folium.CircleMarker(
            location=[state_centroid.y, state_centroid.x],
            radius=scale_bubble_size(row['assets_mill']),
            color='blue',
            fill=True,
            fill_color='blue',
            fill_opacity=0.5,
            popup=f"State: {row['state']}<br>Total Assets: {row['assets_mill']:.0f}<br>Date: {date}"
        ).add_to(m)

    # Display the map
    return m


unique_dates = ['1992-12-31', '2002-12-31', '2012-12-31', '2022-12-31']

maps = []

for date in unique_dates:
    maps.append(plot_bank_assets_by_date(pd.to_datetime(date)))


def folium_map_to_iframe_data(m):
    data = BytesIO()
    m.save(data, close_file=False)
    data.seek(0)
    b64 = base64.b64encode(data.read()).decode("utf-8")
    return f'<iframe src="data:text/html;base64,{b64}" width="100%" height="400"></iframe>'

table_html = '<table><tr>'
for i, m in enumerate(maps):
    iframe_data = folium_map_to_iframe_data(m)
    table_html += f'<td><h3>{unique_dates[i]}</h3>{iframe_data}</td>'
    if (i + 1) % 2 == 0:
        table_html += '</tr><tr>'
table_html += '</tr></table>'

display(HTML(table_html))

1992-12-31

2002-12-31

2012-12-31

2022-12-31

from IPython.display import display, HTML
from io import BytesIO
import base64
import os
import pandas as pd
import geopandas as gpd
import folium


# Convert quarterly_financials to a GeoDataFrame
gdf = gpd.GeoDataFrame(quarterly_financials, geometry=gpd.points_from_xy(quarterly_financials.zip_coordinates.apply(lambda p: p[1]), quarterly_financials.zip_coordinates.apply(lambda p: p[0])))

def plot_bank_assets_by_date(date):
    # Filter the GeoDataFrame by the selected date
    filtered_gdf = gdf[gdf['report_date'] == date]

    # Group the filtered GeoDataFrame by state and sum the assets
    state_assets = filtered_gdf.groupby('state')['assets_mill'].sum().reset_index()

    # Create the base folium map
    m = folium.Map(location=[37.8, -96], zoom_start=4)

    # Define a function to scale the assets to a suitable size for the map
    def scale_bubble_size(assets):
        return assets / 50000

    # Plot bubbles for each state with a size proportional to the total assets
    for index, row in state_assets.iterrows():
        state_data = filtered_gdf[filtered_gdf['state'] == row['state']]
        state_centroid = state_data.unary_union.centroid
        folium.CircleMarker(
            location=[state_centroid.y, state_centroid.x],
            radius=scale_bubble_size(row['assets_mill']),
            color='blue',
            fill=True,
            fill_color='blue',
            fill_opacity=0.5,
            popup=f"State: {row['state']}<br>Total Assets: {row['assets_mill']:.0f}<br>Date: {date}"
        ).add_to(m)

    # Display the map
    return m


unique_dates = ['1992-12-31', '2002-12-31', '2012-12-31', '2022-12-31']

maps = []

for date in unique_dates:
    maps.append(plot_bank_assets_by_date(pd.to_datetime(date)))

    
def folium_map_to_iframe_data(m, map_name):
    map_path = f'{map_name}.html'
    m.save(map_path)
    IFrame(m, width=700, height=600)
    display(m)


for i, m in enumerate(maps):
    map_name = f'map_{i}'
    iframe_data = folium_map_to_iframe_data(m, map_name)
Make this Notebook Trusted to load map: File -> Trust Notebook
Make this Notebook Trusted to load map: File -> Trust Notebook
Make this Notebook Trusted to load map: File -> Trust Notebook
Make this Notebook Trusted to load map: File -> Trust Notebook
from IPython.display import display, HTML
from io import BytesIO
import base64
import os
import pandas as pd
import geopandas as gpd
import folium


# Convert quarterly_financials to a GeoDataFrame
gdf = gpd.GeoDataFrame(quarterly_financials, geometry=gpd.points_from_xy(quarterly_financials.zip_coordinates.apply(lambda p: p[1]), quarterly_financials.zip_coordinates.apply(lambda p: p[0])))

def plot_bank_assets_by_date(date):
    # Filter the GeoDataFrame by the selected date
    filtered_gdf = gdf[gdf['report_date'] == date]

    # Group the filtered GeoDataFrame by state and sum the assets
    state_assets = filtered_gdf.groupby('state')['assets_mill'].sum().reset_index()

    # Create the base folium map
    m = folium.Map(location=[37.8, -96], zoom_start=4)

    # Define a function to scale the assets to a suitable size for the map
    def scale_bubble_size(assets):
        return assets / 50000

    # Plot bubbles for each state with a size proportional to the total assets
    for index, row in state_assets.iterrows():
        state_data = filtered_gdf[filtered_gdf['state'] == row['state']]
        state_centroid = state_data.unary_union.centroid
        folium.CircleMarker(
            location=[state_centroid.y, state_centroid.x],
            radius=scale_bubble_size(row['assets_mill']),
            color='blue',
            fill=True,
            fill_color='blue',
            fill_opacity=0.5,
            popup=f"State: {row['state']}<br>Total Assets: {row['assets_mill']:.0f}<br>Date: {date}"
        ).add_to(m)

    # Display the map
    return m


unique_dates = ['1992-12-31', '2002-12-31', '2012-12-31', '2022-12-31']

maps = []

for date in unique_dates:
    maps.append(plot_bank_assets_by_date(pd.to_datetime(date)))

    
def folium_map_to_iframe_data(m, map_name):
    map_path = f'{map_name}.html'
    m.save(map_path)
    iframe = IFrame(src=map_path, width=350, height=300)
    return iframe

# Create a 2-column layout for the iframes
display(HTML("""
<style>
    .map-container {
        display: flex;
        flex-wrap: wrap;
        justify-content: space-around;
    }
    .map-box {
        width: 50%;
        padding: 5px;
    }
</style>
<div class="map-container">
"""))

for i, m in enumerate(maps):
    map_name = f'map_{i}'
    iframe_data = folium_map_to_iframe_data(m, map_name)
    display(HTML(f'<div class="map-box">{iframe_data._repr_html_()}</div>'))

# Close the map-container div
display(HTML('</div>'))
from IPython.display import display, HTML
from io import BytesIO
import base64
import os
import pandas as pd
import geopandas as gpd
import folium


# Convert quarterly_financials to a GeoDataFrame
gdf = gpd.GeoDataFrame(quarterly_financials, geometry=gpd.points_from_xy(quarterly_financials.zip_coordinates.apply(lambda p: p[1]), quarterly_financials.zip_coordinates.apply(lambda p: p[0])))

def plot_bank_assets_by_date(date):
    # Filter the GeoDataFrame by the selected date
    filtered_gdf = gdf[gdf['report_date'] == date]

    # Group the filtered GeoDataFrame by state and sum the assets
    state_assets = filtered_gdf.groupby('state')['assets_mill'].sum().reset_index()

    # Create the base folium map
    m = folium.Map(location=[37.8, -96], zoom_start=3)

    # Define a function to scale the assets to a suitable size for the map
    def scale_bubble_size(assets):
        return assets / 50000

    # Plot bubbles for each state with a size proportional to the total assets
    for index, row in state_assets.iterrows():
        state_data = filtered_gdf[filtered_gdf['state'] == row['state']]
        state_centroid = state_data.unary_union.centroid
        folium.CircleMarker(
            location=[state_centroid.y, state_centroid.x],
            radius=scale_bubble_size(row['assets_mill']),
            color='blue',
            fill=True,
            fill_color='blue',
            fill_opacity=0.5,
            popup=f"State: {row['state']}<br>Total Assets: {row['assets_mill']:.0f}<br>Date: {date}"
        ).add_to(m)

    # Display the map
    return m


unique_dates = ['1992-12-31', '2002-12-31', '2012-12-31', '2022-12-31']

maps = []

for date in unique_dates:
    maps.append(plot_bank_assets_by_date(pd.to_datetime(date)))

    

def folium_map_to_iframe_data(m, map_name):
    map_path = f'{map_name}.html'
    m.save(map_path)
    iframe = IFrame(src=map_path, width=350, height=300)
    return iframe

# Initialize the HTML structure for the 2-column layout
html_structure = """
<style>
    .map-container {
        display: flex;
        flex-wrap: wrap;
    }
    .map-box {
        width: 50%;
        padding: 5px;
        box-sizing: border-box;
    }
</style>
<div class="map-container">
"""

# Add the iframes to the HTML structure
for i, m in enumerate(maps):
    map_name = f'map_{i}'
    iframe_data = folium_map_to_iframe_data(m, map_name)
    html_structure += f'<div class="map-box">{iframe_data._repr_html_()}</div>'

# Close the map-container div and display the HTML structure
html_structure += '</div>'
display(HTML(html_structure))
from IPython.display import display, HTML
from io import BytesIO
import base64
import os
import pandas as pd
import geopandas as gpd
import folium


# Convert quarterly_financials to a GeoDataFrame
gdf = gpd.GeoDataFrame(quarterly_financials, geometry=gpd.points_from_xy(quarterly_financials.zip_coordinates.apply(lambda p: p[1]), quarterly_financials.zip_coordinates.apply(lambda p: p[0])))

def plot_bank_assets_by_date(date):
    # Filter the GeoDataFrame by the selected date
    filtered_gdf = gdf[gdf['report_date'] == date]

    # Group the filtered GeoDataFrame by state and sum the assets
    state_assets = filtered_gdf.groupby('state')['assets_mill'].sum().reset_index()

    # Create the base folium map
    m = folium.Map(location=[37.8, -96], zoom_start=3)

    # Define a function to scale the assets to a suitable size for the map
    def scale_bubble_size(assets):
        return assets / 50000

    # Plot bubbles for each state with a size proportional to the total assets
    for index, row in state_assets.iterrows():
        state_data = filtered_gdf[filtered_gdf['state'] == row['state']]
        state_centroid = state_data.unary_union.centroid
        folium.CircleMarker(
            location=[state_centroid.y, state_centroid.x],
            radius=scale_bubble_size(row['assets_mill']),
            color='blue',
            fill=True,
            fill_color='blue',
            fill_opacity=0.5,
            popup=f"State: {row['state']}<br>Total Assets: {row['assets_mill']:.0f}<br>Date: {date}"
        ).add_to(m)

    # Display the map
    return m


unique_dates = ['1992-12-31', '2002-12-31', '2012-12-31', '2022-12-31']

maps = []

for date in unique_dates:
    maps.append(plot_bank_assets_by_date(pd.to_datetime(date)))

    

def folium_map_to_iframe_data(m, map_name):
    map_path = f'{map_name}.html'
    m.save(map_path)
    iframe = IFrame(src=map_path, width=350, height=300)
    return iframe

# Initialize the HTML structure for the 2-column layout
html_structure = """
<style>
    .map-container {
        display: flex;
        flex-wrap: wrap;
    }
    .map-box {
        width: 50%;
        padding: 5px;
        box-sizing: border-box;
    }
    .map-title {
        text-align: center;
        font-weight: bold;
    }
</style>
<div class="map-container">
"""

# Add the iframes to the HTML structure with the date on top
for i, m in enumerate(maps):
    map_name = f'map_{i}'
    iframe_data = folium_map_to_iframe_data(m, map_name)
    date = unique_dates[i]
    html_structure += f'<div class="map-box"><div class="map-title">{date}</div>{iframe_data._repr_html_()}</div>'

# Close the map-container div and display the HTML structure
html_structure += '</div>'
display(HTML(html_structure))
1992-12-31
2002-12-31
2012-12-31
2022-12-31
from ipywidgets.embed import embed_minimal_html
import branca
from IPython.display import HTML, clear_output, display
import base64
from io import BytesIO
from IPython.display import IFrame

gdf = gpd.GeoDataFrame(quarterly_financials, geometry=gpd.points_from_xy(quarterly_financials.zip_coordinates.apply(lambda p: p[1]), quarterly_financials.zip_coordinates.apply(lambda p: p[0])))

def plot_bank_assets_by_date(date):
    # Filter the GeoDataFrame by the selected date
    filtered_gdf = gdf[gdf['report_date'] == date]

    # Group the filtered GeoDataFrame by state and sum the assets
    state_assets = filtered_gdf.groupby('state')['assets_mill'].sum().reset_index()

    # Create the base folium map
    m = folium.Map(location=[37.8, -96], zoom_start=4)

    # Define a function to scale the assets to a suitable size for the map
    def scale_bubble_size(assets):
        return assets / 50000

    # Plot bubbles for each state with a size proportional to the total assets
    for index, row in state_assets.iterrows():
        state_data = filtered_gdf[filtered_gdf['state'] == row['state']]
        state_centroid = state_data.unary_union.centroid
        folium.CircleMarker(
            location=[state_centroid.y, state_centroid.x],
            radius=scale_bubble_size(row['assets_mill']),
            color='blue',
            fill=True,
            fill_color='blue',
            fill_opacity=0.5,
            popup=f"State: {row['state']}<br>Total Assets: {row['assets_mill']:.0f}<br>Date: {date}"
        ).add_to(m)

    # Display the map
    return m

unique_dates = sorted(quarterly_financials['report_date'].unique())

index_slider = widgets.IntSlider(
    min=0,
    max=len(unique_dates) - 1,
    description='Date Index:',
    continuous_update=False,
)

play_button = widgets.Play(
    interval=100,  # Time in milliseconds between updates
    value=0,  # Slider starting value
    min=0,
    max=len(unique_dates) - 1,
    step=1,
    description="Press play",
    disabled=False
)

widgets.jslink((play_button, 'value'), (index_slider, 'value'))

slider_with_play = widgets.HBox([index_slider, play_button])


map_display_id = display(display_map(index_slider.value), display_id=True)


def folium_map_to_iframe(m):
    data = BytesIO()
    m.save(data, close_file=False)
    data.seek(0)
    b64 = base64.b64encode(data.read()).decode("utf-8")
    return IFrame(f"data:text/html;base64,{b64}", width="100%", height="400")


def display_map(index):
    date = unique_dates[index]
    m = plot_bank_assets_by_date(date)
    iframe = folium_map_to_iframe(m)  # Convert the Folium map to an iframe
    return iframe  # return the iframe instead of the map

def on_slider_change(change):
    iframe = display_map(change["new"])
    map_display_id.update(iframe)




index_slider.observe(on_slider_change, names="value")

# Trigger the initial display of the map
on_slider_change({"new": index_slider.value})

# Display the slider with the play button
display(slider_with_play)
from jinja2 import Template
import json

template = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Bank Assets Map</title>
    <link rel="stylesheet" href="https://unpkg.com/leaflet@1.7.1/dist/leaflet.css" />
    <script src="https://unpkg.com/leaflet@1.7.1/dist/leaflet.js"></script>
    <style>
        #map {height: 400px; width: 100%;}
        #slider-container {margin: 10px 0;}
        #play-button {margin-left: 10px;}
    </style>
</head>
<body>
    <div id="slider-container">
        <input type="range" id="slider" min="0" max="{{ num_dates - 1 }}" value="0">
        <button id="play-button">Play</button>
    </div>
    <div id="map"></div>
    <script>
        const dates = {{ dates }};
        let map;
        let circles = [];
        let currentIndex = 0;
        let playInterval;

        function scaleBubbleSize(assets) {
            return assets / 50000;
        }

        function updateMap(index) {
            if (map) {
                circles.forEach(circle => {
                    map.removeLayer(circle);
                });
                circles = [];
            } else {
                map = L.map('map').setView([37.8, -96], 4);
            }

            const date = dates[index];
            const stateAssets = {{ state_assets_by_date_json }}[date];

            for (const state in stateAssets) {
                const assets = stateAssets[state];
                const centroid = {{ centroids_json }}[state];

                const circle = L.circleMarker(centroid, {
                    radius: scaleBubbleSize(assets),
                    color: 'blue',
                    fill: true,
                    fillColor: 'blue',
                    fillOpacity: 0.5
                }).bindPopup(`State: ${state}<br>Total Assets: ${assets.toFixed(0)}<br>Date: ${date}`).addTo(map);

                circles.push(circle);
            }
        }

        function onSliderChange() {
            currentIndex = parseInt(document.getElementById('slider').value);
            updateMap(currentIndex);
        }

        function onPlayButtonClick() {
            if (playInterval) {
                clearInterval(playInterval);
                playInterval = null;
                document.getElementById('play-button').textContent = 'Play';
            } else {
                playInterval = setInterval(() => {
                    currentIndex++;
                    if (currentIndex >= {{ num_dates }}) {
                        currentIndex = 0;
                    }
                    document.getElementById('slider').value = currentIndex;
                    updateMap(currentIndex);
                }, 100);
                document.getElementById('play-button').textContent = 'Pause';
            }
        }

        document.getElementById('slider').addEventListener('input', onSliderChange);
        document.getElementById('play-button').addEventListener('click', onPlayButtonClick
        });

        updateMap(currentIndex);
    </script>
</body>
</html>
 """

# Prepare the data for the template
unique_dates = sorted(quarterly_financials['report_date'].unique())
state_centroids = gdf.groupby('state').apply(lambda x: x.unary_union.centroid.coords[0]).to_dict()
state_assets_by_date = (gdf.groupby(['report_date', 'state'])['assets_mill']
                        .sum()
                        .unstack(fill_value=0)
                        .stack()
                        .reset_index(name='assets_mill')
                        .groupby('report_date')
                        .apply(lambda x: x.set_index('state')['assets_mill'].to_dict())
                        .to_dict())

state_assets_by_date = {k.strftime('%Y-%m-%d'): v for k, v in state_assets_by_date.items()}

# Render the template with the data
rendered_template = Template(template).render(
    num_dates=len(unique_dates),
    dates=unique_dates,
    centroids_json=json.dumps(state_centroids),
    state_assets_by_date_json=json.dumps(state_assets_by_date),
)

# Save the rendered template to a standalone HTML file
with open("map.html", "w") as f:
    f.write(rendered_template)
import base64
from io import BytesIO
import geopandas as gpd
import folium
import pandas as pd
from ipywidgets import interact_manual, IntSlider
from ipywidgets.embed import embed_minimal_html

# Load your data into the quarterly_financials variable
# ... your data loading code here ...

gdf = gpd.GeoDataFrame(quarterly_financials, geometry=gpd.points_from_xy(quarterly_financials.zip_coordinates.apply(lambda p: p[1]), quarterly_financials.zip_coordinates.apply(lambda p: p[0])))

def plot_bank_assets_by_date(date):
    filtered_gdf = gdf[gdf['report_date'] == date]
    state_assets = filtered_gdf.groupby('state')['assets_mill'].sum().reset_index()

    m = folium.Map(location=[37.8, -96], zoom_start=4)

    def scale_bubble_size(assets):
        return assets / 50000

    for index, row in state_assets.iterrows():
        state_data = filtered_gdf[filtered_gdf['state'] == row['state']]
        state_centroid = state_data.unary_union.centroid
        folium.CircleMarker(
            location=[state_centroid.y, state_centroid.x],
            radius=scale_bubble_size(row['assets_mill']),
            color='blue',
            fill=True,
            fill_color='blue',
            fill_opacity=0.5,
            popup=f"State: {row['state']}<br>Total Assets: {row['assets_mill']:.0f}<br>Date: {date}"
        ).add_to(m)

    return m

unique_dates = sorted(quarterly_financials['report_date'].unique())

index_slider = IntSlider(
    min=0,
    max=len(unique_dates) - 1,
    description='Date Index:',
    continuous_update=False,
)

def display_map(index):
    date = unique_dates[index]
    m = plot_bank_assets_by_date(date)
    data = BytesIO()
    m.save(data, close_file=False)
    data.seek(0)
    b64 = base64.b64encode(data.read()).decode("utf-8")
    return f'<iframe src="data:text/html;base64,{b64}" width="100%" height="400"></iframe>'

# Wrap the display_map function with interact_manual
interact_manual_plot = interact_manual(display_map, index=index_slider)

# Export the plot and slider widgets to an HTML file
embed_minimal_html('output.html', views=[interact_manual_plot], title='Bank Assets by Date')
AttributeError: 'function' object has no attribute 'get_view_spec'
from ipywidgets.embed import embed_minimal_html
import branca
from IPython.display import HTML, clear_output
import base64
from io import BytesIO
from IPython.display import IFrame

gdf = gpd.GeoDataFrame(quarterly_financials, geometry=gpd.points_from_xy(quarterly_financials.zip_coordinates.apply(lambda p: p[1]), quarterly_financials.zip_coordinates.apply(lambda p: p[0])))

def plot_bank_assets_by_date(date):
    # Filter the GeoDataFrame by the selected date
    filtered_gdf = gdf[gdf['report_date'] == date]

    # Group the filtered GeoDataFrame by state and sum the assets
    state_assets = filtered_gdf.groupby('state')['assets_mill'].sum().reset_index()

    # Create the base folium map
    m = folium.Map(location=[37.8, -96], zoom_start=4)

    # Define a function to scale the assets to a suitable size for the map
    def scale_bubble_size(assets):
        return assets / 50000

    # Plot bubbles for each state with a size proportional to the total assets
    for index, row in state_assets.iterrows():
        state_data = filtered_gdf[filtered_gdf['state'] == row['state']]
        state_centroid = state_data.unary_union.centroid
        folium.CircleMarker(
            location=[state_centroid.y, state_centroid.x],
            radius=scale_bubble_size(row['assets_mill']),
            color='blue',
            fill=True,
            fill_color='blue',
            fill_opacity=0.5,
            popup=f"State: {row['state']}<br>Total Assets: {row['assets_mill']:.0f}<br>Date: {date}"
        ).add_to(m)

    # Display the map
    return m

unique_dates = sorted(quarterly_financials['report_date'].unique())

index_slider = widgets.IntSlider(
    min=0,
    max=len(unique_dates) - 1,
    description='Date Index:',
    continuous_update=False,
)

play_button = widgets.Play(
    interval=100,  # Time in milliseconds between updates
    value=0,  # Slider starting value
    min=0,
    max=len(unique_dates) - 1,
    step=1,
    description="Press play",
    disabled=False
)

widgets.jslink((play_button, 'value'), (index_slider, 'value'))

slider_with_play = widgets.HBox([index_slider, plasy_button])

def folium_map_to_iframe(m):
    data = BytesIO()
    m.save(data, close_file=False)
    data.seek(0)
    b64 = base64.b64encode(data.read()).decode("utf-8")
    return IFrame(f"data:text/html;base64,{b64}", width="100%", height="400")


def display_map(index):
    date = unique_dates[index]
    m = plot_bank_assets_by_date(date)
    iframe = folium_map_to_iframe(m)  # Convert the Folium map to an iframe
    return iframe  # return the iframe instead of the map

from ipywidgets import HTML as HTML_widget

# Create an HTML widget to display the map
map_html_widget = HTML_widget()

from ipywidgets import VBox
from ipywidgets.embed import embed_minimal_html

def on_slider_change(change):
    iframe = display_map(change["new"])
    map_html_widget.value = iframe._repr_html_()  # Update the HTML widget with the new iframe

# Trigger the initial display of the map
initial_iframe = display_map(index_slider.value)
map_html_widget.value = initial_iframe._repr_html_()

# Create a VBox containing the slider and the map
vbox = VBox([slider_with_play, map_html_widget])

# Display the VBox in the notebook
display(vbox)

# Export the VBox to a standalone HTML file
embed_minimal_html("output.html", views=[vbox], title="Interactive Map")

NameError: name 'plasy_button' is not defined
from ipywidgets.embed import embed_minimal_html
import branca
from IPython.display import HTML, clear_output
import base64
from io import BytesIO
from IPython.display import IFrame

gdf = gpd.GeoDataFrame(quarterly_financials, geometry=gpd.points_from_xy(quarterly_financials.zip_coordinates.apply(lambda p: p[1]), quarterly_financials.zip_coordinates.apply(lambda p: p[0])))

def plot_bank_assets_by_date(date):
    # Filter the GeoDataFrame by the selected date
    filtered_gdf = gdf[gdf['report_date'] == date]

    # Group the filtered GeoDataFrame by state and sum the assets
    state_assets = filtered_gdf.groupby('state')['assets_mill'].sum().reset_index()

    # Create the base folium map
    m = folium.Map(location=[37.8, -96], zoom_start=4)

    # Define a function to scale the assets to a suitable size for the map
    def scale_bubble_size(assets):
        return assets / 50000

    # Plot bubbles for each state with a size proportional to the total assets
    for index, row in state_assets.iterrows():
        state_data = filtered_gdf[filtered_gdf['state'] == row['state']]
        state_centroid = state_data.unary_union.centroid
        folium.CircleMarker(
            location=[state_centroid.y, state_centroid.x],
            radius=scale_bubble_size(row['assets_mill']),
            color='blue',
            fill=True,
            fill_color='blue',
            fill_opacity=0.5,
            popup=f"State: {row['state']}<br>Total Assets: {row['assets_mill']:.0f}<br>Date: {date}"
        ).add_to(m)

    # Display the map
    return m

unique_dates = sorted(quarterly_financials['report_date'].unique())

index_slider = widgets.IntSlider(
    min=0,
    max=len(unique_dates) - 1,
    description='Date Index:',
    continuous_update=False,
)

play_button = widgets.Play(
    interval=100,  # Time in milliseconds between updates
    value=0,  # Slider starting value
    min=0,
    max=len(unique_dates) - 1,
    step=1,
    description="Press play",
    disabled=False
)

widgets.jslink((play_button, 'value'), (index_slider, 'value'))

slider_with_play = widgets.HBox([index_slider, play_button])

def folium_map_to_iframe(m):
    data = BytesIO()
    m.save(data, close_file=False)
    data.seek(0)
    b64 = base64.b64encode(data.read()).decode("utf-8")
    return IFrame(f"data:text/html;base64,{b64}", width="100%", height="400")


def display_map(index):
    date = unique_dates[index]
    m = plot_bank_assets_by_date(date)
    iframe = folium_map_to_iframe(m)  # Convert the Folium map to an iframe
    return iframe  # return the iframe instead of the map

from ipywidgets import HTML as HTML_widget

# Create an HTML widget to display the map
map_html_widget = HTML_widget()

from ipywidgets import VBox
from ipywidgets.embed import embed_minimal_html

def on_slider_change(change):
    iframe = display_map(change["new"])
    map_html_widget.value = iframe._repr_html_()  # Update the HTML widget with the new iframe

index_slider.observe(on_slider_change, names='value')


# Trigger the initial display of the map
initial_iframe = display_map(index_slider.value)
map_html_widget.value = initial_iframe._repr_html_()

# Create a VBox containing the slider and the map
vbox = VBox([slider_with_play, map_html_widget])

# Display the VBox in the notebook
display(vbox)

# Export the VBox to a standalone HTML file
embed_minimal_html("output.html", views=[vbox], title="Interactive Map")

from ipywidgets.embed import embed_minimal_html
import branca
from IPython.display import HTML, clear_output
import base64
from io import BytesIO
from IPython.display import FileLink

gdf = gpd.GeoDataFrame(quarterly_financials, geometry=gpd.points_from_xy(quarterly_financials.zip_coordinates.apply(lambda p: p[1]), quarterly_financials.zip_coordinates.apply(lambda p: p[0])))

def plot_bank_assets_by_date(date):
    # Filter the GeoDataFrame by the selected date
    filtered_gdf = gdf[gdf['report_date'] == date]

    # Group the filtered GeoDataFrame by state and sum the assets
    state_assets = filtered_gdf.groupby('state')['assets_mill'].sum().reset_index()

    # Create the base folium map
    m = folium.Map(location=[37.8, -96], zoom_start=4)

    # Define a function to scale the assets to a suitable size for the map
    def scale_bubble_size(assets):
        return assets / 50000

    # Plot bubbles for each state with a size proportional to the total assets
    for index, row in state_assets.iterrows():
        state_data = filtered_gdf[filtered_gdf['state'] == row['state']]
        state_centroid = state_data.unary_union.centroid
        folium.CircleMarker(
            location=[state_centroid.y, state_centroid.x],
            radius=scale_bubble_size(row['assets_mill']),
            color='blue',
            fill=True,
            fill_color='blue',
            fill_opacity=0.5,
            popup=f"State: {row['state']}<br>Total Assets: {row['assets_mill']:.0f}<br>Date: {date}"
        ).add_to(m)

    # Display the map
    return m

unique_dates = sorted(quarterly_financials['report_date'].unique())

index_slider = widgets.IntSlider(
    min=0,
    max=len(unique_dates) - 1,
    description='Date Index:',
    continuous_update=False,
)

play_button = widgets.Play(
    interval=100,  # Time in milliseconds between updates
    value=0,  # Slider starting value
    min=0,
    max=len(unique_dates) - 1,
    step=1,
    description="Press play",
    disabled=False
)

widgets.jslink((play_button, 'value'), (index_slider, 'value'))

slider_with_play = widgets.HBox([index_slider, play_button])

def folium_map_to_iframe(m):
    data = BytesIO()
    m.save(data, close_file=False)
    data.seek(0)
    b64 = base64.b64encode(data.read()).decode("utf-8")
    return IFrame(f"data:text/html;base64,{b64}", width="100%", height="400")


def display_map(index):
    date = unique_dates[index]
    m = plot_bank_assets_by_date(date)
    iframe = folium_map_to_iframe(m)  # Convert the Folium map to an iframe
    return iframe  # return the iframe instead of the map

from ipywidgets import HTML as HTML_widget

# Create an HTML widget to display the map
map_html_widget = HTML_widget()

from ipywidgets import VBox
from ipywidgets.embed import embed_minimal_html

def on_slider_change(change):
    iframe = display_map(change["new"])
    map_html_widget.value = iframe._repr_html_()  # Update the HTML widget with the new iframe

index_slider.observe(on_slider_change, names='value')


# Trigger the initial display of the map
initial_iframe = display_map(index_slider.value)
map_html_widget.value = initial_iframe._repr_html_()

# Create a VBox containing the slider and the map
vbox = VBox([slider_with_play, map_html_widget])

# Display the VBox in the notebook
display(vbox)

# Export the VBox to a standalone HTML file
embed_minimal_html("output.html", views=[vbox], title="Interactive Map")

Individual Points

Heatmap all dates

Latest date only

import folium
from folium.plugins import HeatMap
import pandas as pd
from IPython.display import IFrame

# Find the latest date
latest_date = quarterly_financials['report_date'].max()

# Filter the DataFrame by the latest date
latest_quarterly_financials = quarterly_financials[quarterly_financials['report_date'] == latest_date]

# Create a base map
map = folium.Map(location=[37.8, -96], zoom_start=4)

# Prepare the data for the HeatMap
heatmap_data = []
for index, row in latest_quarterly_financials.iterrows():
    coordinates = row['zip_coordinates']
    assets_mill = row['assets_mill']
    heatmap_data.append((*coordinates, assets_mill))

# Add the HeatMap to the map
heatmap = HeatMap(heatmap_data, radius=13, max_zoom=13, gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'orange', 1: 'red'})
map.add_child(heatmap)

# For rendering on Quarto
map.save('heatmap_latest_date.html')
IFrame(src='heatmap_latest_date.html', width=700, height=600)

# For rendering in Jupyter Notebook
display(map)
Make this Notebook Trusted to load map: File -> Trust Notebook
import folium
from folium.plugins import HeatMap
import pandas as pd
from IPython.display import IFrame

# Find the latest date
latest_date = quarterly_financials['report_date'].max()

# Filter the DataFrame by the latest date
latest_quarterly_financials = quarterly_financials[quarterly_financials['report_date'] == latest_date]

# Create a base map
map = folium.Map(location=[37.8, -96], zoom_start=4)

# Prepare the data for the HeatMap
heatmap_data = []
for index, row in latest_quarterly_financials.iterrows():
    coordinates = row['zip_coordinates']
    assets_mill = row['assets_mill']
    heatmap_data.append((*coordinates, assets_mill))

# Add the HeatMap to the map
heatmap = HeatMap(heatmap_data, radius=13, max_zoom=13, gradient={0.0: '#ffffb2', 0.25: '#fecc5c', 0.5: '#fd8d3c', 0.75: '#f03b20', 1.0: '#bd0026'}) # YlOrBr color scheme
map.add_child(heatmap)

# Function to scale the assets to a suitable size for the map
def scale_bubble_size(assets):
    return assets / 50000

# Add CircleMarkers with tooltips to the map
for index, row in latest_quarterly_financials.iterrows():
    coordinates = row['zip_coordinates']
    assets_mill = row['assets_mill']
    bank_name = row['name']
    tooltip_text = f"Bank: {bank_name}<br>Coordinates: {coordinates}<br>Assets (millions): {assets_mill}"
    folium.CircleMarker(
        location=coordinates,
        radius=scale_bubble_size(assets_mill),
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.5,
        tooltip=tooltip_text,
    ).add_to(map)

# For rendering on Quarto
map.save('heatmap.html')
IFrame(src='heatmap.html', width=700, height=600)

# For rendering in Jupyter Notebook
display(map)
Make this Notebook Trusted to load map: File -> Trust Notebook